# ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 nohup torchrun --nproc_per_node=4 train_sft.py > ttt.out &
# nohup torchrun --nproc_per_node=16 train_onlinedpo1.py > qwen2.5vl3b.out &
# nohup torchrun --nproc_per_node=8 train_grpo.py --loss_type grpo --output_dir output_grpo_qwen2_5vl_7b > qwen2.5vl7b-grpo.out &
# torchrun --nproc_per_node=8 train_grpo.py --loss_type grpo --output_dir output_grpo_qwen2_5vl_7b > qwen2.5vl7b-grpo.out
# torchrun --nproc_per_node=8 train_grpo.py --loss_type dapo --output_dir output_dapo_qwen2_5vl_7b > qwen2.5vl7b-dapo.out
# torchrun --nproc_per_node=8 train_grpo.py --loss_type dr_grpo --output_dir output_dr_qwen2_5vl_7b > qwen2.5vl7b-dr_grpo.out
# torchrun --nproc_per_node=8 train_sft.py > sft.out
torchrun --nproc_per_node=8 train_grpo.py --loss_type grpo --output_dir output_grpo_qwen2_5vl_3b > qwen2.5vl3b-grpo.out
torchrun --nproc_per_node=8 train_grpo.py --loss_type dapo --output_dir output_dapo_qwen2_5vl_3b > qwen2.5vl3b-dapo.out
torchrun --nproc_per_node=8 train_grpo.py --loss_type dr_grpo --output_dir output_dr_qwen2_5vl_3b > qwen2.5vl3b-dr_grpo.out